function [y_expect,y_expect_b] = expectations(beta_best,beta_best_b,y,v_unscaled,G)

% function for calculating expectations for binary generalized linear model
% based on computed beta values, and evaluating their difference with
% respect to the data

% Histograms of G(beta' v) are plotted for data in which y_i=0 and 1
% separately as well.





% Compute the expected values of each outcome based on the calculated beta
% values, and the explanatory variable data
y_expect = G(v_unscaled*transpose(beta_best));
y_expect_b = G(v_unscaled*transpose(beta_best_b));

% find individuals which have an outcome of 1 and of 0
outcome1_idx = find(y);
outcome0_idx = find(1-y);
% separate out the GLM-based data based on the actual outcome, and compute
% the y-values conditioned on whether the y-data was 0 or 1
for jj=1:length(outcome1_idx)
    y_expect_1(jj) = y_expect(outcome1_idx(jj));
    y_expect_b_1(jj) = y_expect_b(outcome1_idx(jj));
end
for jj=1:length(outcome0_idx)
    y_expect_0(jj) = y_expect(outcome0_idx(jj));
    y_expect_b_0(jj) = y_expect_b(outcome0_idx(jj));
end

% Some high-level statistics 
% samle ones
count_0 = numel(y_expect_0);
count_1 = numel(y_expect_1);
y0_below_p1 = numel(y_expect_0(y_expect_0<0.1)) / count_0;
y0_above_p9 = numel(y_expect_0(y_expect_0>0.9)) / count_0;
y1_below_p1 = numel(y_expect_1(y_expect_1<0.1)) / count_1;
y1_above_p9 = numel(y_expect_1(y_expect_1>0.9)) / count_1;
% non-samle ("bad") ones
count_b_0 = numel(y_expect_b_0);
count_b_1 = numel(y_expect_b_1);
y0_below_p1_b = numel(y_expect_b_0(y_expect_b_0<0.1)) / count_b_0;
y0_above_p9_b = numel(y_expect_b_0(y_expect_b_0>0.9)) / count_b_0;
y1_below_p1_b = numel(y_expect_b_1(y_expect_b_1<0.1)) / count_b_1;
y1_above_p9_b = numel(y_expect_b_1(y_expect_b_1>0.9)) / count_b_1;


% Plot histogram of original y (outcome) data and computed one (SAMLE)
figure(200)
title('Probability Densities using SAMLE')
subplot(2,2,1)
histogram(y,'Normalization','pdf')
xlim([0 1])
xlabel('$ y_i $','Interpreter','Latex')
ylabel('pdf')
ax = gca; ax.FontSize = 16;
subplot(2,2,2)
histogram(y_expect,'Normalization','pdf')
xlim([0 1])
xlabel('$ G(\beta^\top v_i) $','Interpreter','Latex')
ylabel('pdf','Interpreter','Latex')
ax = gca; ax.FontSize = 16;
subplot(2,2,3)
histogram(y_expect_0,'Normalization','pdf')
xlim([0 1])
xlabel('$ G(\beta^\top v_i) $','Interpreter','Latex')
ylabel('pdf of $ Y|y_i=0 $', 'Interpreter','Latex')
ax = gca; ax.FontSize = 16;
subplot(2,2,4)
histogram(y_expect_1,'Normalization','pdf')
xlim([0 1])
xlabel('$ G(\beta^\top v_i) $','Interpreter','Latex')
ylabel('pdf of $ Y|y_i=1 $', 'Interpreter','Latex')
ax = gca; ax.FontSize = 16;

% Parts of the above plotted as separate plots
figure(250)
histogram(y_expect_0,'Normalization','pdf')
xlim([0 1])
xlabel('$ G(\beta^\top v_i) $','Interpreter','Latex')
ylabel('density $ \, | \, y_i=0 $', 'Interpreter','Latex')
ax = gca; ax.FontSize = 16;
figure(251)
histogram(y_expect_1,'Normalization','pdf')
xlim([0 1])
xlabel('$ G(\beta^\top v_i) $','Interpreter','Latex')
ylabel('density $ \, | \, y_i=1 $', 'Interpreter','Latex')
ax = gca; ax.FontSize = 16;



% Plot histogram of original y (outcome) data and computed one (non-SAMLE
figure(300)
title('Probability Densities using non-SAMLE')
subplot(2,2,1)
histogram(y,'Normalization','pdf')
xlim([0 1])
xlabel('$ y_i $','Interpreter','Latex')
ylabel('pdf','Interpreter','Latex')
ax = gca; ax.FontSize = 16;
subplot(2,2,2)
histogram(y_expect,'Normalization','pdf')
xlim([0 1])
xlabel('$ G(\beta^\top v_i) $','Interpreter','Latex')
ylabel('pdf')
ax = gca; ax.FontSize = 16;
subplot(2,2,3)
histogram(y_expect_b_0,'Normalization','pdf')
xlim([0 1])
xlabel('$ G(\beta^\top v_i) $','Interpreter','Latex')
ylabel('pdf of $ Y|y_i=0 $', 'Interpreter','Latex')
ax = gca; ax.FontSize = 16;
subplot(2,2,4)
histogram(y_expect_b_1,'Normalization','pdf')
xlim([0 1])
xlabel('$ G(\beta^\top v_i) $','Interpreter','Latex')
ylabel('pdf of $ Y|y_i=1 $', 'Interpreter','Latex')
ax = gca; ax.FontSize = 16;



